** Clearing Stata memory
capture log close
clear all
set more off, perm
set seed 1234

///////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////// Figure O.2: Share of difficult questions by order in the exam ///////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////////

** Opening Phase 2 norm_scores dataset 
use "Work Data/Gender_Phase2_long.dta",clear

*********************************************************************************
**************** Main sample ****************************************************
*********************************************************************************

* 1) Only years before the affirmative action took place
drop if aa_year==1
drop if year==2000
tab year

* 2) Drop Portuguese and Foreign Language (in Phase 1 there is no Portuguese or Foreign Language exams - For Portuguese Phase 1 has an essay)
 tab subject, sum(norm_p1score)
 drop if subject=="lang" | subject=="port" 
 tab subject, sum(norm_p1score)
 drop Language Portuguese prio_Language prio_Portuguese fem_prio_Language fem_prio_Portuguese 

*********************************************************************************
**************** Scores by order ************************************************
*********************************************************************************

forvalues i=1(1)12 {
gen score_item`i'=.
levelsof subject, local(levels) 
foreach s of local levels {
replace score_item`i'=`s'`i'_st2 if subject=="`s'"
}
tab subject, sum(score_item`i')
}

keep  subject year score_item* female inscri2

*********************************************************************************
**************** RESHAPE  *******************************************************
*********************************************************************************

* Reshaping data - Student x subject x item
egen id_sub=group(inscri2 subject)

reshape long score_item, i(id_sub) j(order)

*********************************************************************************
**************** Measures of item difficulty   **********************************
*********************************************************************************

**** Measures of item easiness/difficulty - Based on Iriberri & Rey-Biel (2019), Economic Journal

* Average score in a question/item
bys year subject order: egen avg_score=mean(score_item)
tab subject, sum(avg_score)
tab year, sum(avg_score)
tab order,sum(avg_score)
label var avg_score "Question's average score"

* Difficulty dummy
bys year subject: egen median_avg_score=median(avg_score)
gen difficult=1 if avg_score<median_avg_score
replace  difficult=0 if avg_score>=median_avg_score
tab difficult
tab difficult, sum(avg_score)
tab difficult, sum(score_item)

* Difficulty dummy - Conditional on not being zero

bys year subject: egen median_avg_score_pos=median(avg_score_pos)
gen difficult_pos=1 if avg_score_pos<median_avg_score_pos
replace  difficult_pos=0 if avg_score_pos>=median_avg_score_pos
tab difficult_pos
tab difficult_pos, sum(avg_score_pos)
tab difficult_pos, sum(positive_score)

*********************************************************************************
**************** Difficulty level by question's order ***************************
*********************************************************************************

** String variable - Complete subject name
gen subject_long="History" if subject=="hist"
replace subject_long="Geography" if subject=="geog"
replace subject_long="Chemistry" if subject=="chem"
replace subject_long="Biology" if subject=="biol"
replace subject_long="Physics" if subject=="phy"
replace subject_long="Mathematics" if subject=="math"
replace subject_long="Language" if subject=="lang"
replace subject_long="Portuguese" if subject=="port"
tab subject_long subject, mi

label var difficult "Difficult question"

foreach x in difficult   {

preserve

keep subject_long subject `x'  `x'_pos order

foreach v of var * { 
    local l`v' : variable label `v' 
    if `"`l`v''"' == "" { 
        local l`v' "`v'" 
    } 
} 

collapse (mean) `x' `x'_pos, by(order)

foreach v of var * { 
    label var `v' "`l`v''" 
} 

local lab: variable label `x'

twoway (connected `x' order , lcolor(blue)  lpattern(solid)) (connected `x'_pos order, lcolor(red) lpattern(shortdash) msymbol(triangle)), xlabel(1(1)12) xtitle("Question's order") ytitle("`lab'")  ylabel(0(0.1)0.8) legend(order(1 "All scores" 2 "Positive scores") position(6) cols(2))
graph export "Output/order_`x'.pdf", as(pdf) replace

restore
}
